*********************************************************************************
*                                                                               *
*                  Election Frequency - UK Case Study 		     	            *
*                                                                               *
*          	               Data Preparation 							  		*
*                                                                               *
*********************************************************************************

global beg_path `"C:\Users\JohnSmith\Dropbox\Replication_EF\"'  // F - Home

 cd `"${beg_path}Data\Analysis 2A"'


*** Election results 2017
import excel using UKgeneral2017_2.xlsx, first clear
destring Turnout, replace
save UKgeneral2017_2.dta, replace

*** Election results 2015
import excel using 2015_results_complete.xlsx, first clear
rename Totalnumberofvalidvotescoun Total
rename C CON
rename Ind IND
rename Lab LAB
rename Speaker SPK

foreach var of varlist Alliance-UUP {
recode `var' (.=0), gen (`var'_)
drop `var'
} 
* parties in both years
foreach var of varlist Alliance_-UUP_ {
gen `var'15 =  `var'/Total*100
} 
* Other
gen Ot = 100-Alliance_15-CON_15-DUP_15-Green_15-IND_15-LAB_15-UUP_15-LD_15-PBP_15-PC_15-SDLP_15-SF_15-SNP_15-SPK_15-TUV_15-UKIP_15
gen Other_15 = round(Ot,0.01)
drop Ot
tab Other_15

drop Alliance_ - UUP_
save results_2015, replace

*** Election results 2017 (combined)
import excel using 2017_results_complete.xlsx, first clear
rename ONSCode ConstituencyID
* total valid votes in each constituency (the dataset does not cover all valid votes -> import from 2017)
merge m:1 Constituency using UKgeneral2017_2, gen (merge3) keepusing(Voted) 
drop merge3
* generate variables for parties
gen Alliance = 0
gen CON = 0
gen DUP = 0
gen Green = 0
gen IND = 0
gen LAB = 0
gen UUP = 0
gen LD = 0
gen PBP = 0
gen PC = 0
gen SDLP = 0
gen SF = 0
gen SNP = 0
gen SPK = 0
gen TUV = 0
gen UKIP = 0

replace Alliance = Validvotes if PartyIdentifer == "Alliance"
replace CON = Validvotes if PartyIdentifer == "Conservative"
replace DUP = Validvotes if PartyIdentifer == "DUP"
replace Green = Validvotes if PartyIdentifer == "Green Party"
replace IND = Validvotes if PartyIdentifer == "Independent"
replace LAB = Validvotes if PartyIdentifer == "Labour"
replace LD = Validvotes if PartyIdentifer == "Liberal Democrats"
replace UUP = Validvotes if PartyIdentifer == "UUP"
replace PBP = Validvotes if PartyIdentifer == "PBP Alliance"
replace PC = Validvotes if PartyIdentifer == "Plaid Cymru"
replace SDLP = Validvotes if PartyIdentifer == "SDLP"
replace SF = Validvotes if PartyIdentifer == "Sinn Féin"
replace SNP = Validvotes if PartyIdentifer == "SNP"
replace TUV = Validvotes if PartyIdentifer == "TUV"
replace UKIP = Validvotes if PartyIdentifer == "UKIP"

* drop & keeep only first 
drop PartyIdentifer Validvotes

* copy values within the group of party 
foreach var of varlist Alliance-UKIP {
gsort Constituency -`var'
bysort Constituency: gen `var'_ = `var'[1]
} 
* keep only one observation per Constituency
bysort ConstituencyID: keep if _n==1 

* calculation the share
foreach var of varlist Alliance_-UKIP_ {
gen `var'17 =  `var'/Voted*100
} 

* Other
gen Ot = 100-Alliance_17-CON_17-DUP_17-Green_17-IND_17-LAB_17-UUP_17-LD_17-PBP_17-PC_17-SDLP_17-SF_17-SNP_17-SPK_17-TUV_17-UKIP_17
gen Other_17 = round(Ot,0.01)
drop Ot
tab Other 

* drop 
drop Alliance-UKIP_

save results_2017, replace

*** Brexit vote in NI 
import excel using Brexit_NI.xlsx, first clear
drop ConstituencyID  // not right 
save Brexit_NI, replace

*** NI Assembly 2017 elections
import excel using NI_2017.xlsx, first clear
save NI_2017, replace


***************************************** data completion 

* data on 2015 general elections
import excel using UKgeneral2015.xlsx, first clear
gen Year = 2015
gen local_el = 0
gen coverage = 0
gen local_authorities_el = 0
gen coverage_loc_auth = 0
save UKgeneral2015, replace

* merge with 2017 to test whether constituencies the same 
use UKgeneral2017_2.dta
merge 1:1 Constituency using UKgeneral2015, gen (merge2)
* perfect match

* append  
use UKgeneral2017_2.dta, clear
gen Year = 2017
sort Constituency
gen ID = _n
append using UKgeneral2015

* label
label var Turnout "Total Vote Turnout"
label var Voted "Total Voted"

* copy variables from different observations (Type, Region, County, ID, Constituency ID)
sort Constituency Year
bysort Constituency (Type) : replace Type = Type[2]
bysort Constituency (Region) : replace Region = Region[2]
bysort Constituency (County) : replace County = County[2]
bysort Constituency (ID) : replace ID = ID[1]
bysort Constituency (ConstituencyID) : replace ConstituencyID = ConstituencyID[2]

****
gen CON = ., a(ConstituencyID)
gen LAB = ., a(CON)
gen LD = ., a(LAB)
gen SNP = ., a(LD)
gen PC = ., a(SNP)
gen UKIP = ., a(PC)
gen Green = ., a(UKIP)
gen DUP = ., a(Green)
gen SF = ., a(DUP)
gen IND = ., a(SF)
gen PBP = ., a(IND)
gen Alliance = ., a(PBP)
gen SDLP = ., a(Alliance)
gen SPK = ., a(SDLP)
gen TUV = ., a(SPK)
gen UUP = ., a(TUV)
gen Other = ., a(UUP)

*** merge with 2015 & 2017
merge m:1 ConstituencyID using results_2015, gen (merge3)
foreach var of varlist CON-Other {
replace `var' =  `var'_15 if Year == 2015
} 

merge m:1 ConstituencyID using results_2017, gen (merge4)
foreach var of varlist CON-Other {
replace `var' =  `var'_17 if Year == 2017
} 
drop ConstituencyName - merge4

***** first & second score 
egen first = rowmax(CON-Other)
gen second = .
foreach i of num 50/0.01 { 
foreach var of varlist CON-Other {
replace second = `var' if `var'>`i' &`var'!= first & second == .
} 
}

* difference betwen 1st and 2nd
gen difference = first - second


************************************
**** Brexit 
************************************
******** Pippa Norris data 
**** merge with PN dataset 
merge m:1 ConstituencyID using PN_data_2, gen (merge3) keepusing(BREXITLeave result17 Winner17 Winner15)
drop merge3
* ordering variables on Brexit (vote for leave), results (respective to both 17 & 15)
order BREXITLeave, a(Other)
order result17, a(BREXITLeave)
merge m:1 Constituency using Brexit_NI, gen (merge5) keepusing (Leave)
replace BREXITLeave = Leave if BREXITLeave ==.

* check whether some of the 2017 parties that is not included in 2015 results (DUP, SF, SDLP, UUP, ALLIANCE) did not win 
tab Winner17
tab Winner15
* -> DUP and SF in 2017 won  in 20 and 14 constitutiencies respectively. IND and SPK in 2 each (but do not have a separate category)

**** NI Assembly Elections
gen loc_el_NI = 0, a(coverage_loc_auth)
replace loc_el_NI = 1 if Region == "NI"&Year == 2017

gen coverage_NI  = 0, a(loc_el_NI)
replace coverage_NI = 100 if Region == "NI"&Year == 2017
label var BREXITLeave "Pippa Norris data - from Chris Hanrietty"

*** Preparation
******** Chris Hanrietty Data 
* import excel using Brexit_estimates_Hanretty.xlsx, first clear
* gen Brexit_Leave_Han = Brexit_Leave_original*100
* save Brexit_estimates_Hanretty, replace

* merge
merge m:1 ConstituencyID using Brexit_estimates_Hanretty, gen (merge6) keepusing(Brexit_Leave_Han)

drop merge5 merge6
save UK_dataset.dta, replace

************************************
**** Adding 2010 and 2019
************************************

*** 2010 general elections results
import excel using UKgeneral2010.xlsx, first clear
gen Year = 2010
gen local_el = 0
gen coverage = 0
gen local_authorities_el = 0
gen coverage_loc_auth = 0
fre Turnout 
list if Turnout == .
replace Turnout = (Voted/Registered)*100 if Turnout == . 
fre Turnout, all 
save UKgeneral2010, replace

*** 2019 general elections results (EU elections simultaneously)
import excel using UKgeneral2019.xlsx, first clear
drop valid_votes invalid_votes
gen Year = 2019
gen local_el = 0
gen coverage = 0
gen local_authorities_el = 0
gen coverage_loc_auth = 0
save UKgeneral2019, replace

* checking whether constitutencies in 2010 match (2019 do because we have ConstituencyID)
*use UK_2017_prep.dta
*merge 1:1 Constituency using UKgeneral2010, gen (merge1)
*perfect match

** append 2010 and 2019 elections to the original
use UK_dataset.dta, clear
append using UKgeneral2010

***** adding 2019 general elections results
append using UKgeneral2019

* copy variables from different observations (Type, Region, County, ID, Constituency ID)
sort Constituency Year
bysort Constituency (Type) : replace Type = Type[2]
bysort Constituency (Region) : replace Region = Region[2]
bysort Constituency (County) : replace County = County[2]
bysort Constituency (ID) : replace ID = ID[2]
bysort Constituency (ConstituencyID) : replace ConstituencyID = ConstituencyID[2]

*** adding labels 
label var Registered "Registered voters"
label var Voted "Total voted"
label var Turnout "Total turnout"
label var Type "Type of constituency"
label var Region "Region"
label var County "County"
label var local_el "Local elections (England)"
replace coverage = coverage / 100
label var coverage "Coverage of local elections (England, share)"
replace coverage_loc_auth = coverage_loc_auth / 100
label var local_authorities_el "Local authorities elections (Wales and Scotland, share)"
label var coverage_loc_auth "Coverage of local authorities elections (Wales and Scotland)"

fre Region
replace Region = "Wales" if Region == "wales" 

save UK_dataset_final, replace
 














